library(data.table)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5 ✔ purrr 0.3.4
## ✔ tibble 3.1.6 ✔ dplyr 1.0.7
## ✔ tidyr 1.1.4 ✔ stringr 1.4.0
## ✔ readr 2.1.1 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::between() masks data.table::between()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::first() masks data.table::first()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::last() masks data.table::last()
## ✖ purrr::transpose() masks data.table::transpose()
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(pracma)
##
## Attaching package: 'pracma'
## The following object is masked from 'package:purrr':
##
## cross
df <- read.csv("wolfberry.csv", row.names=NULL)
df[df$Y == 0, 1] = "LBPs"
df[df$Y == 2, 1] = "Dextran"
df[df$Y == 3, 1] = "Maltodextrin"
df[df$Y == 4, 1] = "Starch"
There are two general methods to perform PCA in R :
The function princomp() uses the spectral decomposition approach. The functions prcomp() and PCA()[FactoMineR] use the singular value decomposition (SVD).
inclusion1 <- paste0("X", c(1:779))
inclusion2 <- paste0("X", c(1038:1870))
inclusion3 <- paste0("X", c(1142:1870))
inclusion4 <- paste0("X", c(1298:1870))
df_sub <- df[, c("C", "Y", inclusion3)]
df.active <- df_sub %>% filter(Y != 1)
df.active <- df.active %>% filter(C == 0 | C == 0.1)
df.active_x <- df.active %>% select(-c(Y, C))
dim(df.active_x)
## [1] 200 729
We apply the SVD-based PCA to minimize the individual variance.
value_prime <- pracma::gradient(as.numeric(df.active_x[1, ]), h1 = 1)
plot(value_prime, type = "l")
plot(as.numeric(df.active_x[1, ]), type = "l")
derivative <- data.frame()
for (i in c(1:nrow(df.active_x))) {
value_prime <- pracma::gradient(as.numeric(df.active_x[i, ]), h1 = 1)
value_prime_prime <- pracma::gradient(value_prime, h1 = 1)
#value_prime <- c(value_prime, value_prime_prime)
derivative <- rbind(derivative, value_prime)
}
res.pca <- prcomp(derivative, scale = FALSE)
fviz_eig(res.pca)
fviz_pca_ind(res.pca,
axes=c(1,2),
label="none",
habillage=df.active$Y,
addEllipses=TRUE,
ellipse.level=0.95
)
https://plotly.com/r/pca-visualization/
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(stats)
prin_comp <- res.pca
explained_variance_ratio <- summary(prin_comp)[["importance"]]['Proportion of Variance',]
explained_variance_ratio <- 100 * explained_variance_ratio
components <- prin_comp[["x"]]
components <- data.frame(components)
components <- cbind(components, df.active$Y)
components$PC3 <- -components$PC3
components$PC2 <- -components$PC2
axis = list(showline=FALSE,
zeroline=FALSE,
gridcolor='#ffff',
ticklen=4,
titlefont=list(size=13))
fig <- components %>%
plot_ly() %>%
add_trace(
type = 'splom',
dimensions = list(
list(label=paste('PC 1 (',toString(round(explained_variance_ratio[1],1)),'%)',sep = ''), values=~PC1),
list(label=paste('PC 2 (',toString(round(explained_variance_ratio[2],1)),'%)',sep = ''), values=~PC2),
list(label=paste('PC 3 (',toString(round(explained_variance_ratio[3],1)),'%)',sep = ''), values=~PC3),
list(label=paste('PC 4 (',toString(round(explained_variance_ratio[4],1)),'%)',sep = ''), values=~PC4)
),
color = ~factor(`df.active$Y`), colors = c('#636EFA','#EF553B','#00CC96', '#F2C43D')
) %>%
style(diagonal = list(visible = FALSE)) %>%
layout(
legend=list(title=list(text='color')),
hovermode='closest',
dragmode= 'select',
plot_bgcolor='rgba(240,240,240, 0.95)',
xaxis=list(domain=NULL, showline=F, zeroline=F, gridcolor='#ffff', ticklen=4),
yaxis=list(domain=NULL, showline=F, zeroline=F, gridcolor='#ffff', ticklen=4),
xaxis2=axis,
xaxis3=axis,
xaxis4=axis,
yaxis2=axis,
yaxis3=axis,
yaxis4=axis
)
fig
prin_comp <- res.pca
components <- prin_comp[["x"]]
components <- data.frame(components)
components$PC3 <- -components$PC3
components$PC2 <- -components$PC2
components <- cbind(components, df.active$Y)
tot_explained_variance_ratio <- summary(prin_comp)[["importance"]]['Proportion of Variance',]
tot_explained_variance_ratio <- 100 * sum(tot_explained_variance_ratio)
#tit = 'Total Explained Variance = 99.48'
fig <- plot_ly(components, x = ~PC1, y = ~PC2, z = ~PC3, color = ~factor(`df.active$Y`), colors = c('#636EFA','#EF553B','#00CC96', '#F2C43D')) %>%
add_markers(size = 30)
fig <- fig %>%
layout(
scene = list(bgcolor = "#e5ecf6")
)
fig